#!/usr/bin/Rscript

cut_date <- as.Date("2015-01-31")
interval_days <- 7

library(igraph)

load("02_15_m5s_forum_apr15_author_omega.RData")
load("02_15_m5s_forum_apr15_comment.RData")
load("02_15_m5s_forum_apr15_thread.RData")


# Create dataframe nodes
nodes <- author_omega
nodes$author_unique_id <- author_omega$id

# Create dataframe edges
to_temp <- comment$authorUrl[match(comment$parent, comment$commentId)]
to <- ifelse(is.na(to_temp), thread$authorUrl[match(comment$threadId, thread$threadId)], to_temp)
# from <- data.frame(from=comment$authorUrl, createdAt=comment$createdAt)
edges <- data.frame(from=comment$authorUrl,
                    to=to,
                    date=as.numeric(as.POSIXct(comment$createdAt, format="%Y-%m-%dT%H:%M:%S")),
                    thread_id = comment$threadId,
                    comment_id = comment$commentId,
                    author_unique_id=comment$authorUrl)

# Cleaning
edges <- subset(edges,!is.na(to))
edges <- edges[-which(is.na(match(edges$to,nodes$id))),]

# Create vector of data objects to remove later
vector_data_objects <- ls()
mantain <- character()
vector_data_objects <- vector_data_objects[! vector_data_objects %in% mantain]

# Create graph & add static identifier (won't change when vertices are removed)
directreply_graph <- graph.data.frame(edges, directed=TRUE, vertices=nodes)
# Remove non-uniquely identified users
directreply_graph <- 
  directreply_graph - V(directreply_graph)[!grepl("http|NEWFK",V(directreply_graph)$author_unique_id)]
V(directreply_graph)$static_id <- seq(1:length(V(directreply_graph)))

save(directreply_graph, file="02_23_m5s_forum_direct_reply.RData")

library(openssl)
V(directreply_graph)$name <- md5(V(directreply_graph)$name)
V(directreply_graph)$author_unique_id <- md5(V(directreply_graph)$author_unique_id)
E(directreply_graph)$author_unique_id <- md5(E(directreply_graph)$author_unique_id)
save(directreply_graph, file="02_23_m5s_forum_direct_reply_anonymised.RData")

